--- %%NOBANNER%% -->
/*------------------<--- Start of Description -->--------------------\
| Returns the leverage residuals matrix L for a proportional hazards |
| model. The returned residuals data set is n by p, where n is the |
| number of (non-missing) subjects used in the PHREG fit, and p is |
| the number of fitted covariates. The leverage residuals are an |
| approximation to the jackknife; the i-th observation of the dataset|
| contains the approximate change in beta if observation i were |
| dropped from the model. These residuals will normally be plotted |
| as an aid to finding influential or "high leverage" observations. |
| Some data sets, particularily those with recurring events, may |
| contain multiple observations for a single individual. The macro |
| has the option of "collapsing" the matrix of leverage residuals |
| so that the output has only one observation per unique individual. |
| The procedure can also estimate the robust covariance estimate |
| L'L. When there are multiple, possibly correlated, observations |
| per subject this option combined with the collapsing mentioned |
| above, gives an estimate of variance that is corrected for the |
| correlation. |
|--------------------<--- End of Description -->---------------------|
|--------------------------------------------------------------------|
|--------------<--- Start of Files or Arguments Needed -->-----------|
| Arguments: |
| - Required: |
| data=input SAS data set name |
| time=time variable for survival. |
| This may be a single variable or an interval expressed |
| as (t1,t2). |
| event=event variable for survival, 1=event, 0=censored |
| xvars=list of covariates for Cox model |
| - Optional: |
| strata=stratification variable(one only) for stratifed Cox |
| models. |
| id=name of id variable to be included in the output dataset. |
| It must be numeric. |
| collapse= Y,N,T,F. If yes(Y,T), then the residual matrix is |
| collapsed (summed) based on unique values of the id |
| variable. Default is not to collapse(N). |
| outlev= name of the data set containing the leverage residuals.|
| The variables names will be the same as the covariate |
| names in the Cox model. The data set will also contain |
| the ID variable, if one was specified. Default data set |
| name is "phlev". |
| outvar = name of the output data set containing the robust |
| variance estimate. There is one observation and one |
| variable for each covariate. Default data set name is |
| "phvar". |
| plot = Y,N,T,F. If yes(Y,T), a plot of residuals for each |
| observation or ID value(if collapse=Y) is |
| produced. A separate plot is created for each |
| covariate. Default is N. |
| scaled = Y,N,T,F. If yes(Y,T), the scaled score residuals |
| are plotted. If N(default), the raw residuals |
| are used. |
| ref = Y,N,T,F. If yes(Y,T), reference lines are drawn on the |
| plots at +-se(beta). Default is N. |
| print = Y,N. If N, the phreg printout is suppressed. |
| Default is N |
| showlog = Y,N. If N, the SAS log is suppressed. |
| Default is N. |
| ties = Breslow,Discrete,Efron,Exact. Method to use for handling|
| failure time ties. Default is Breslow. |
| Output: The macro prints the PHREG output used to fit the Cox model|
| and a summary table including the Cox model betas, SE and chi- |
| square along with the robust SE and its associated chi-square. |
| It also includes the global Wald chi-square test based on the |
| leverage residuals. The summary table is available as a SAS |
| data set named _b_se. |
|---------------<--- End of Files or Arguments Needed -->------------|
|--------------------------------------------------------------------|
|----------------<--- Start of Example and Usage -->-----------------|
| Example: |
| %LET XX= RX1 RX2 RX3 RX4 NUMBER1 NUMBER2 NUMBER3 NUMBER4 SIZE1 |
| SIZE2 SIZE3 SIZE4; |
| %PHLEV(DATA=Bladder,TIME=time,EVENT=stat, |
| XVARS= &XX, STRATA=enum, ID=id, COLLAPSE=Y); |
| Usage: %phlev(data=,time=,event=,xvars=,strata=, id=, collapse=N, |
| outlev=phlev, outvar=phvar,plot=N, scaled=N, ref=N, |
| print=N, showlog=N, ties=breslow); |
| References: |
| The leverage residuals are discussed in Cain and Lange, |
| Biometrics 40:, 493-9 (1984). |
| The robust variance estimate L'L is derived in Lin and Wei, |
| JASA 84: 725-8 (1989). |
| Extension of this to correlated data, using the collapsed |
| leverage matrix is developed in Wei, Lin, and Weissfeld, JASA 84:|
| 1065-83, amoung others. |
| An overview of the methods is found in Mayo Clinic Tech Report58.|
\-------------------<--- End of Example and Usage -->---------------*/
%macro phlev(data=,time=,event=,xvars=,
strata=, id=, collapse=N, outlev=phlev, outvar=phvar,
plot=N, scaled=N, ref=N, print=N, showlog=N, ties=breslow);
/*--------------------------------------------\
| Author: E. Bergstralh, J. Kosanke and |
| T. Therneau |
| Created: September 8, 1993; |
| Purpose: return leverage residual matrix for|
| proportional hazard model; |
\--------------------------------------------*/
%if %upcase(&showlog)=N %then %do;
%if &sysscpl=Solaris %then %do;
proc printto log='/dev/null';
%end;
%if &sysscpl=MVS %then %do;
filename dummy '&&temp' disp=(new,delete,delete);
proc printto log=dummy;
%end;
%end;
%local t paren timeint timeint1 timeint2 i nxs sp;
%if %upcase(&collapse)=T %then %let collapse=Y;
%if %upcase(&collapse)=F %then %let collapse=N;
%if %upcase(&plot)=T %then %let plot=Y;
%if %upcase(&plot)=F %then %let plot=N;
%if %upcase(&scaled)=T %then %let scaled=Y;
%if %upcase(&scaled)=F %then %let scaled=N;
%if %upcase(&ref)=T %then %let ref=Y;
%if %upcase(&ref)=F %then %let ref=N;
%let paren=%str(%();
%if %index(&time,&paren)>0 %then %do;
%let timeint=1;
%let timeint1=%qscan(&time,1);
%let timeint2=%qscan(&time,2);
%end;
%else %let timeint=0;
footnote"phlev macro: event=&event time=&time strata=&strata id=&id collapse=
&collapse ties=&ties";
footnote2"Xvars= &xvars";
%let nxs=0; %**count the number of x vars;
%do i=1 %to 50;
%if %scan(&xvars,&i)= %then %goto done;
%let nxs=%eval(&nxs+1);
%end;
%done: %put &nxs;
%macro xlist(prefix); %**set-up var list code;
%local j;
%do j=1 %to &nxs; &prefix&j %end;
%mend xlist;
data _setup; set &data; **delete obs with missing values;
keep &id
%if &timeint=0 %then %do;
&time
%end;
%if &timeint=1 %then %do;
&timeint1 &timeint2
%end;
&event &strata &xvars;
xmiss=0;
%do i=1 %to &nxs;
xx=%scan(&xvars,&i);
if xx=. then xmiss=1;
%end;
if &event=.
%if &timeint=0 %then %do;
or &time=.
%end;
%if &timeint=1 %then %do;
or &timeint1=. or &timeint2=.
%end;
or xmiss=1 then delete;
** run phreg and grab output datasets;
proc phreg data=_setup noprint;
model &time*&event(0)= &xvars / maxiter=0 ties=&ties;
%if &strata ^= %then %do; strata &strata; %end;
output out=_out1(keep=&id dfb1-dfb&nxs) dfbeta=dfb1-dfb&nxs;
id &id;
%if %upcase(&collapse)=Y %then %do;
proc sort data=_out1 ; by &id; **collapse the dataset..sum within id;
proc means noprint; by &id; var dfb1-dfb&nxs;
output out=_out1 sum=dfb1-dfb&nxs;
run;
%end;
proc means noprint; var dfb1-dfb&nxs;
output out=_out2 sum=sum1-sum&nxs;
run;
proc phreg data=_setup covout outest=_est
%if %upcase(&print)=N %then %do;
noprint
%end;
;
model &time*&event(0)= &xvars / ties=&ties;
%if &strata ^= %then %do; strata &strata; %end;
output out=_sch ressco=s1-s&nxs;
id &id;
/* Check for bad model */
data _null_; set _est(where=(_type_='PARMS'));
bad=0;
%do i=1 %to &nxs;
if %scan(&xvars,&i)=0 then bad=1;
%end;
call symput('getout',bad);
run;
%if &getout=1 %then %do;
data _null_;
file print;
put 'Singular model - no robust solution produced';
put '%phlev will stop processing';
run;
%end;
%else %do;
data _sch5 ; set _sch(keep=&id &strata
%if &timeint=0 %then %do;
&time
%end;
%if &timeint=1 %then %do;
&timeint1 &timeint2
%end;
&event s1-s&nxs);
keep &strata &id
%if &timeint=0 %then %do;
&time
%end;
%if &timeint=1 %then %do;
&timeint1 &timeint2
%end;
&event &xvars;
label
%if &timeint=0 %then %do;
&time=
%end;
%if &timeint=1 %then %do;
&timeint1=
&timeint2=
%end;
&event= ;
%do i=1 %to &nxs; **rename scor_i vars to xvars;
%scan(&xvars,&i) =s&i;
%end;
*proc print data=_sch5 ;
*title3'Crude score residuals';
run;
%if %upcase(&collapse)=Y %then %do;
proc sort data=_sch5 ; by &id; **collapse the dataset..sum within id;
proc means noprint; by &id; var &xvars;
output out=_sch5 sum=&xvars;
*title3"Crude score residuals..collapsed(summed) over &id";
*proc print data=_sch5 ;
run;
%end;
proc iml; **Invoke IML to get L..scaled residuals;
use _sch5 ; setin _sch5 ;
read all var{ &xvars } into r;
%if &id ^= %then %do;
read all var{&id } into id_time;
%end;
use _est; setin _est;
read all var{&xvars } into covb where(_type_="COV ");
read var{&xvars } into beta where(_type_="PARMS");
L=r*covb; **Scaled score residuals;
LTL=L`*L; **Robust var matrix;
se_cox=sqrt(vecdiag(covb)); **SE from Cox model;
se_scr=sqrt(vecdiag(LTL )); **SE based on L'*L;
chisqcox=(beta` / se_cox)##2; **Chi-square Cox model;
chisqrob=(beta` / se_scr)##2; **Chi-square based on L'*L;
wald_x2=beta*ginv(LTL)*beta`; **Global Wald chi-square..robust;
eigv=eigval(LTL);
df=sum(eigv > .000000000001); **Wald degrees freedom;
p_wald= 1 - probchi(wald_x2,df); **Wald p-value;
idl= %if &id ^= %then %do; id_time || %end; L;
b_se= beta` || se_cox || se_scr || chisqcox || chisqrob;
wald= wald_x2 || df || p_wald;
use _out1; setin _out1;
read all var{%do i=1 %to &nxs; dfb&i %end; } into dzero;
use _out2; setin _out2;
read all var{%do i=1 %to &nxs; sum&i %end; } into temp;
roblr=temp*inv(dzero`*dzero)*temp`;
p_lr=1-probchi(roblr,df);
rob_lr=roblr || df || p_lr;
bl_1x5=j(1,5,.);
bl_px3=j(&nxs,3,.);
table= (b_se || bl_px3) // ( bl_1x5 || wald ) // ( bl_1x5 || rob_lr );
%let sp=%str( );
varn ={ %if &id^= %then %do; "&id" %end;
%do i=1 %to &nxs;
%let t= "%scan(&xvars,&i)" &sp ; &t
%end;
};
xnames={
%do i=1 %to &nxs;
%let t= "%scan(&xvars,&i)" &sp ; &t
%end;
};
xname ={
%do i=1 %to &nxs;
%let t= "%scan(&xvars,&i)" &sp ; &t
%end;
"wald" "robust score"
};
sen={ "b_cox" "se_cox" "se_robst" "chi_cox" "chi_rob"
"chi_w_lr" "df" "p_w_lr" };
create &outlev from idl[colname=varn ];
append from idl;
create &outvar from ltl[rowname=xnames colname=xnames];
append from ltl[rowname=xnames];
create _b_se from table[ rowname=xname colname=sen ];
append from table[rowname=xname ];
*show datasets;
*show contents;
quit; ** quit IML;
title4"Comparison of Cox model Beta, SE and chi-square to robust estimates";
title5"Wald chi-square is based on the robust estimates";
%if %upcase(&collapse)=Y %then %do;
title6"Robust SE is based on the collapsed(summed within &id) L matrix";
%end;
data _b_se; set _b_se;
if _n_<=&nxs then p=1-probchi(chi_rob,1);
if _n_>=&nxs+1 then p=p_w_lr;
label xname='Variable'
b_cox='Parameter/Estimate'
se_cox='SE'
se_robst='Robust/SE'
chi_cox='Chi-Square'
chi_rob='Robust/Chi-Square'
chi_w_lr='Chi-Square';
%if %upcase(&scaled)=Y %then %do i=1 %to &nxs;
if _n_=&i then do;
call symput("pvref&i",se_robst);
call symput("nvref&i",-1*se_robst);
end;
%end;
proc print data=_b_se label split='/';
id xname;
var b_cox se_cox se_robst chi_cox chi_rob chi_w_lr df p;
format chi_cox chi_rob chi_w_lr 7.3 p 6.4;
run;
footnote; symbol; title4;
%if %upcase(&plot)=Y %then %do;
%if %upcase(&scaled)=N %then %do;
%if &id= %then %do;
data _sch5; set _sch5;
obs=_n_;
%let id=obs;
%end;
proc plot data=_sch5;
plot (&xvars)*&id;
title4 'Plot of Raw Residuals';
%end;
%if %upcase(&scaled)=Y %then %do;
%if &id= %then %do;
data &outlev; set &outlev;
obs=_n_;
%let id=obs;
%end;
proc plot data=&outlev;
%do i=1 %to &nxs;
%let xvble=%scan(&xvars,&i);
plot &xvble*&id
%if %upcase(&ref)=Y %then %do;
/vref=&&pvref&i &&nvref&i
%end;
;
%end;
title4 'Plot of Scaled Residuals';
%end;
%end;
proc datasets nolist;
delete _setup _est _sch _sch5 _out1 _out2;
run;
quit;
%end;
%if %upcase(&showlog)=N %then %do;
proc printto; run;
%end;
%mend phlev ;